R Markdown

loading the R packages

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
## 
## 
## Attaching package: 'plotly'
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## 
## Please note that rgdal will be retired during 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
## See https://r-spatial.org/r/2022/04/12/evolution.html and https://github.com/r-spatial/evolution
## rgdal: version: 1.6-3, (SVN revision 1187)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.5.2, released 2022/09/02
## Path to GDAL shared files: C:/Users/hajam/AppData/Local/R/win-library/4.2/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 8.2.1, January 1st, 2022, [PJ_VERSION: 821]
## Path to PROJ shared files: C:/Users/hajam/AppData/Local/R/win-library/4.2/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.5-1
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
## 
## Loading required package: spData
## 
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## 
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired during 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
## 
## rgeos version: 0.6-1, (SVN revision 692)
##  GEOS runtime version: 3.9.3-CAPI-1.14.3 
##  Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
##  GEOS using OverlayNG
##  Linking to sp version: 1.5-1 
##  Polygon checking: TRUE 
## 
## 
## 
## Attaching package: 'transformr'
## 
## 
## The following object is masked from 'package:sf':
## 
##     st_normalize
## 
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: foreach
## 
## 
## Attaching package: 'foreach'
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## 
## Loading required package: parallel
## 
## This is INLA_23.01.02-1 built 2023-01-02 19:28:15 UTC.
##  - See www.r-inla.org/contact-us for how to get help.

Loading the data set

You can also embed plots, for example:

load("perfecture.rds")
glimpse(reduced_dataset)
## Rows: 1,128
## Columns: 5
## Groups: birth_year [24]
## $ birth_year <dbl> 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995, 1995,…
## $ prefecture <chr> "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",…
## $ lbw        <dbl> 4087, 998, 887, 1604, 718, 783, 1652, 2206, 1681, 1383, 520…
## $ normal     <dbl> 45866, 12976, 12133, 20667, 9279, 10726, 19657, 26056, 1698…
## $ total      <dbl> 49953, 13974, 13020, 22271, 9997, 11509, 21309, 28262, 1866…
map <- read_sf("jpn_adm/jpn_admbnda_adm1_2019.shp")

loading shape file of Japan

ggplot(map) + geom_sf()

cleaning the map

map$ADM1_PCODE <- gsub( "JP", "", as.character(map$ADM1_PCODE))
map <- map %>% select(ADM1_EN, ADM1_PCODE, geometry)

cleaning the dataset

data_japan <- reduced_dataset %>% rename(ADM1_PCODE = prefecture)

Creating a dataframe for the SIR

d <- data_japan %>% select(ADM1_PCODE, birth_year, lbw)

Expected cases

data_japan <- data_japan[order(data_japan$ADM1_PCODE, data_japan$birth_year),]

checking the first several row

data_japan <- data_japan %>% select(ADM1_PCODE, birth_year, lbw, total)
data_japan[1:20,]
## # A tibble: 20 × 4
## # Groups:   birth_year [20]
##    ADM1_PCODE birth_year   lbw total
##    <chr>           <dbl> <dbl> <dbl>
##  1 01               1995  4087 49953
##  2 01               1996  4079 49794
##  3 01               1997  4108 48924
##  4 01               1998  4333 49065
##  5 01               1999  4217 46691
##  6 01               2000  4349 46791
##  7 01               2001  4224 46253
##  8 01               2002  4406 46122
##  9 01               2003  4169 44948
## 10 01               2004  4275 44029
## 11 01               2005  4073 41421
## 12 01               2006  4280 42206
## 13 01               2007  4150 41557
## 14 01               2008  3977 41077
## 15 01               2009  3726 40164
## 16 01               2010  3988 40168
## 17 01               2011  3847 39308
## 18 01               2012  3810 38692
## 19 01               2013  3780 38195
## 20 01               2014  3645 37068

Expected number calculation

library(SpatialEpi)
n.strata <- 1
E <- expected(population = data_japan$total, data_japan$lbw, n.strata = 1)

Creating a dataframe for expected counts

nyears <- length(unique(data_japan$birth_year))
admi1E <- rep(unique(data_japan$ADM1_PCODE), each = nyears)
ncountries <- length(unique(data_japan$ADM1_PCODE))
yearsE <- rep(unique(data_japan$birth_year), times = ncountries)

JapanE <- data.frame(ADM1_PCODE = admi1E, birth_year = yearsE, E = E)
head(JapanE)
##   ADM1_PCODE birth_year        E
## 1         01       1995 4584.754
## 2         01       1996 4570.161
## 3         01       1997 4490.311
## 4         01       1998 4503.252
## 5         01       1999 4285.363
## 6         01       2000 4294.541

SIR

d <- full_join(data_japan, JapanE, by = c("ADM1_PCODE", "birth_year"))

calculating SIR which is the observed divided by the expected

d$SIR <- d$lbw/d$E

Data check

SIR_data <- d %>% select (ADM1_PCODE, birth_year, SIR)
head(SIR_data)
## # A tibble: 6 × 3
## # Groups:   birth_year [6]
##   ADM1_PCODE birth_year   SIR
##   <chr>           <dbl> <dbl>
## 1 01               1995 0.891
## 2 01               1996 0.893
## 3 01               1997 0.915
## 4 01               1998 0.962
## 5 01               1999 0.984
## 6 01               2000 1.01

adding perfecture names

names_of <- read_delim("prefectures.txt", delim = ",")
## Rows: 47 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): id, prefecture_id, prefecture_en, prefecture_ja
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names_of <- names_of %>% select(prefecture_id, prefecture_en) %>% rename(ADM1_PCODE = prefecture_id)

Joining the names

SIR_data <- full_join(SIR_data, names_of, by = "ADM1_PCODE")
save(SIR_data, file = "SIR_data.Rdata")

Plotting time-series

g <- ggplot(SIR_data, aes(x = birth_year, y = SIR, 
                   group = prefecture_en, color = prefecture_en)) +
  geom_line() + geom_point(size = 2) + theme_bw()
g

ggplotly(g)

Mapping

map <- full_join(SIR_data, map, by = "ADM1_PCODE")
map <- st_as_sf(map)

Creating an animation for the SIR change

ggplot(map) + geom_sf(aes(fill = SIR)) +
  theme_bw() +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank()
  ) +
  scale_fill_gradient2(
    midpoint = 1, low = "blue", mid = "white", high = "red"
  ) +
  transition_time(birth_year) +
  labs(title = "Year: {round(frame_time, 0)}")

Estimating SIR using Bernardinelli model

Loading the Neigberhood Matrix

load("japanstuartv2.rdata")

Neighborhood matrix

head(pnb)
## [[1]]
## [1] 0
## 
## [[2]]
## [1] 3 5
## 
## [[3]]
## [1] 2 4 5
## 
## [[4]]
## [1] 3 5 6 7
## 
## [[5]]
## [1] 2 3 4 6
## 
## [[6]]
## [1]  4  5  7 15
nb2INLA("map.adj", pnb)
g <- inla.read.graph(filename = "map.adj")

Inference using INLA

d$idarea <- as.numeric(as.factor(d$ADM1_PCODE))
d$idarea1 <- d$idarea
d$idtime <- 1 + d$birth_year - min(d$birth_year)
formula <- d$lbw ~ f(idarea, model = "bym", graph = g) +
  f(idarea1, idtime, model = "iid") + idtime
res <- inla(formula,
  family = "poisson", data = d, E = E,
  control.predictor = list(compute = TRUE)
)
d$RR <- res$summary.fitted.values[, "mean"]
d$LL <- res$summary.fitted.values[, "0.025quant"]
d$UL <- res$summary.fitted.values[, "0.975quant"]
d$logrr <- log(d$RR)
d$exprr <- exp(d$RR)
d$ADM1_PCODE <- as.numeric(as.character(d$ADM1_PCODE))
map$ADM1_PCODE <- as.numeric(as.character(map$ADM1_PCODE))
map <- full_join(map, d, by = c("ADM1_PCODE", "birth_year"))
ggplot(map) + geom_sf(aes(fill = RR)) +
  theme_bw() +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank()
  ) +
  scale_fill_gradient2(
    midpoint = 1, low = "blue", mid = "white", high = "red"
  ) +
  transition_time(birth_year) +
  labs(title = "Year: {round(frame_time, 0)}")